#coding:utf8
import codecs

import re
stopwords = codecs.open('data/stopwords','r',encoding='utf8').readlines()
stopwords = [ w.strip() for w in stopwords ]


def dealRM(filename):
    f=codecs.open("data/199801.txt","r")
    file=codecs.open(filename,"a")
    lines=f.readlines()
    for line in lines:
        line1=re.sub(r'/[a-zA-Z]+','',line)
        line2=re.sub(r'\d{8}-\d{2}-\d{3}-\d{3}\s+','',line1)
        line3=re.sub("^[[],\.\!?\/\\:;\"\'_+=-@#$%^&*()<>《》，。？、：；“”‘’！@……（）——+=]+$",'',line2)
        wordList=line3.split()
        words=[]
        for word in wordList:
               words.append(word)
        file.write(" ".join(words)+"\n")

filename="data/rm.txt"
dealRM(filename)